#!/usr/bin/env python3
# coding=utf-8
"""
自动更新合并host文件
Filename: updateHosts.py
usage: python updateHosts.py
version: 23.3.24
python version: 3.6+
"""
import re
import sys
import os
import time
import datetime
import platform
import configparser
import argparse
import urllib.request

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

BLOCK_IP = '127.0.0.1'
#脚本路径
script_path=sys.argv[0]

#脚本所在目录
SCRIPT_PATH = os.path.dirname(script_path)
if not SCRIPT_PATH:
    SCRIPT_PATH = './'
#屏蔽文件目录
DIR_HOSTBLOCK=os.path.realpath(SCRIPT_PATH+os.sep+'hosts.block')

#合并文件目录
DIR_HOSTS = os.path.realpath(SCRIPT_PATH+os.sep+'hosts')

#白名单
white_file=os.path.join(SCRIPT_PATH,'whitehost.txt')

HOSTS_PATHS={'Windows'	: r'C:\WINDOWS\system32\drivers\etc\hosts',
			'Linux' : '/etc/hosts',
			'Darwin' : '/private/etc/hosts'}

def get_files(_dir):
	_list=[]	
	for root, dirs, files in os.walk(_dir, topdown=False):
		for name in files:
			_file= os.path.join(root, name)
			_list.append(_file)
	return _list



def request_web_hosts(url):
    ''''''
    print('[request] '+url)
    try:
        #_c= urllib.urlopen(url).read()
        with urllib.request.urlopen(url) as response:
            _content = response.read().decode('utf-8')
            return '#web_host:'+url+'\n'+_content.replace('\r\n','\n')+'\n\n'
    except Exception as e:
        print('ERROR request web hosts file failed %s'%e)
        #quit()

    return ''

class Main():
    def __init__(self, config_file:str ) -> None:
        self.config_file = config_file
        self.cf = None

        self.copy_str = [] #暂存 原样复制的hosts

        self.white_listhost = [] #不屏蔽的域名
        self.block_list = [] #屏蔽的hosts
        self.content = "" #要插入的内容
        
        self.parse_configfile()

    def parse_configfile(self):
        if not os.path.isfile(self.config_file):    
            print(bcolors.FAIL+'config file not exits:'+self.config_file+bcolors.ENDC)
            return 
        print(f"[config] {self.config_file}")
        self.cf = configparser.ConfigParser()
        try:
            self.cf.read(self.config_file)
            return True
        except Exception as e:
            print(bcolors.FAIL+'Parse config file failed'+str(e)+bcolors.ENDC)
            return False
    def parse_white_list(self, file):
        _list=[]
        if not os.path.isfile(file):
            return _list
        with open(file, 'r') as f:
            for line in f.readlines():
                line=line.strip()
                if line!='':
                    _list.append(line)
        return _list

    def get_blocked_hosts(self):
        self.white_listhosts = self.parse_white_list(white_file)
        for file in get_files(DIR_HOSTBLOCK):
            print('[block] '+os.path.basename(file))
            self.block_list.extend(self.parse_blocked_hosts_fromfile(file))
            self.block_list.extend('\n')
            
    def parse_blocked_hosts_fromfile(self, file):
        _lines=['#host.block/'+os.path.basename(file)]
        with open(file,'r') as _h:
            for line in _h.readlines():
                line=line.strip()
                if self._in_white_list(line):
                    continue
                if line!='':
                    _lines.append(BLOCK_IP + '\t' + line)
          
        return _lines
        
    def _in_white_list(self, host):
        return host in self.white_listhost
    
    def parse_host_web(self):                
        host_web = []
        if not self.cf:
            return
        if self.cf.has_section('web_host'):
            for _opt in self.cf.options('web_host'):
                host_web.append(self.cf.get('web_host', _opt))

        if host_web!=[]:
            for _url in host_web:
                self.copy_str.append(request_web_hosts(_url))

    def copy_hosts_dir(self):#原样copy hosts目录中的文件
        #print(f'DIR_HOSTS: {DIR_HOSTS}')
        for file in get_files(DIR_HOSTS):
            print(f'[copy] {file}')
            with open(file,'r') as h:
                #print('copy '+os.path.basename(file))
                self.copy_str.append('#host/'+os.path.basename(file)+'\n')
                self.copy_str.append(h.read(1024*1024).strip())
                self.copy_str.append('\n\n')

    def build_content(self):
        SIGN_NOTICE="""#Those contents was created by updateHosts.py ,\n#Do not change it directly or it will be overwritten after run updateHosts.py\n\n"""

        SIGN_STAT ='#----ksc_start-----'
        SIGN_END  ='#----ksc_end-----'
        #print(self.copy_str)
        _content= '\n'.join(self.copy_str)+"\n".join(self.block_list)

        pattern = r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([\w\.-]+)"
        matches = re.findall(pattern, _content)
        domain_variables = {}
        for match in matches:
            #print(match)
            domain_variables[match[1]] = match[0]
        #print(domain_variables)    
        lines = []
        for line in _content.split('\n'):
            if line.startswith('{'):
                #{domain1}  domain2
                #解析当前行中的变量域名
                _pattern = r"\{([\w\.-]+)}(.*)"
                _matches = re.findall(_pattern, line)
                #print(line)
                #print(_matches[0])
                if domain_variables.get(_matches[0][0]):
                    line = domain_variables.get(_matches[0][0])+' '+ _matches[0][1]
                    print('parsed domain_variable: '+line)
                #quit()
            lines.append(line)    
        _now = datetime.datetime.now()    
        _content= SIGN_STAT+'\n'+f"#Updated at {_now}"+'\n'+SIGN_NOTICE+'\n'.join(lines)+'\n'+SIGN_END


        self.content = _content
        #print(_content)

    def build(self, args):
        self.parse_host_web()
        self.copy_hosts_dir()
        self.get_blocked_hosts()
        self.build_content()
        #quit()
        hosts_path = HOSTS_PATHS[platform.system()]
        hosts_out = hosts_path


        hosts_file= open(hosts_path,'r')

        lines = hosts_file.readlines()
        hosts_file.close()

        need_insert= True
        hosts_list=[]
        if args.show:
            print(f'\n\nINSERT CONTENT: \n{self.content}')
            #quit()
        is_filter=False #正在或者开始过滤 老的 自动插入内容
        for _line in lines:
            if(_line.find('ksc_start')!=-1):
                hosts_list.append(self.content+'\n')
                need_insert=False
                is_filter=True
            else:
                if is_filter:
                    if(_line.find('ksc_end')!=-1):#结束标记
                        is_filter=False
                else:
                    hosts_list.append(_line)
            
        if need_insert:
            hosts_list.append('\n'+self.content )

        #hosts_out=os.path.join(script_dir,'result.txt'); 
        #print(''.join(hosts_list))
        if args.dry:
            return
        try:
            with open(hosts_out, 'w', encoding='utf8') as h:
                h.write(''.join(hosts_list))
                print(bcolors.OKCYAN+"success"+bcolors.ENDC)
        except Exception as e:
            print(bcolors.FAIL+str(e)+bcolors.ENDC)
            
# 解析命令行参数
parser = argparse.ArgumentParser(description='Update the hosts file.')
parser.add_argument('config', default='config.ini', nargs='?', help='show the inserted content')
parser.add_argument('--show', default=False, action='store_true', help='show the inserted content')
parser.add_argument('--dry', default=False, action='store_true', help='not insert')
args = parser.parse_args()

# make ansi escape codes work by running the color command in cmd. 
os.system("color")


#配置文件

config_file=args.config

if os.path.isabs(config_file)==False:#若是相对路径，则转化为绝对的
    config_file=os.path.realpath(SCRIPT_PATH+os.sep+config_file)
    

# 获取客户端 ID 和默认的 IP 地
main = Main(config_file)
main.build(args)